<!DOCTYPE html>
<html lang="en-us">
  <head>

    <meta http-equiv="content-type" content="text/html; charset=utf-8">
    
<meta charset="UTF-8">
<title>Monitoring Individual Nodes | Elasticsearch: The Definitive Guide [2.x] | Elastic</title>
<link rel="home" href="index.html" title="Elasticsearch: The Definitive Guide [2.x]">
<link rel="up" href="cluster-admin.html" title="Monitoring">
<link rel="prev" href="_cluster_health.html" title="Cluster Health">
<link rel="next" href="_cluster_stats.html" title="Cluster Stats">
<meta name="DC.type" content="Learn/Docs/Legacy/Elasticsearch/Definitive Guide/2.x">
<meta name="DC.subject" content="Elasticsearch">
<meta name="DC.identifier" content="2.x">
<meta name="robots" content="noindex,nofollow">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <script src="https://cdn.optimizely.com/js/18132920325.js"></script>
    <link rel="apple-touch-icon" sizes="57x57" href="/apple-icon-57x57.png">
    <link rel="apple-touch-icon" sizes="60x60" href="/apple-icon-60x60.png">
    <link rel="apple-touch-icon" sizes="72x72" href="/apple-icon-72x72.png">
    <link rel="apple-touch-icon" sizes="76x76" href="/apple-icon-76x76.png">
    <link rel="apple-touch-icon" sizes="114x114" href="/apple-icon-114x114.png">
    <link rel="apple-touch-icon" sizes="120x120" href="/apple-icon-120x120.png">
    <link rel="apple-touch-icon" sizes="144x144" href="/apple-icon-144x144.png">
    <link rel="apple-touch-icon" sizes="152x152" href="/apple-icon-152x152.png">
    <link rel="apple-touch-icon" sizes="180x180" href="/apple-icon-180x180.png">
    <link rel="icon" type="image/png" href="/favicon-32x32.png" sizes="32x32">
    <link rel="icon" type="image/png" href="/android-chrome-192x192.png" sizes="192x192">
    <link rel="icon" type="image/png" href="/favicon-96x96.png" sizes="96x96">
    <link rel="icon" type="image/png" href="/favicon-16x16.png" sizes="16x16">
    <link rel="manifest" href="/manifest.json">
    <meta name="apple-mobile-web-app-title" content="Elastic">
    <meta name="application-name" content="Elastic">
    <meta name="msapplication-TileColor" content="#ffffff">
    <meta name="msapplication-TileImage" content="/mstile-144x144.png">
    <meta name="theme-color" content="#ffffff">
    <meta name="naver-site-verification" content="936882c1853b701b3cef3721758d80535413dbfd">
    <meta name="yandex-verification" content="d8a47e95d0972434">
    <meta name="localized" content="true">
    <meta name="st:robots" content="follow,index">
    <meta property="og:image" content="https://www.elastic.co/static/images/elastic-logo-200.png">
    <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
    <link rel="icon" href="/favicon.ico" type="image/x-icon">
    <link rel="apple-touch-icon-precomposed" sizes="64x64" href="/favicon_64x64_16bit.png">
    <link rel="apple-touch-icon-precomposed" sizes="32x32" href="/favicon_32x32.png">
    <link rel="apple-touch-icon-precomposed" sizes="16x16" href="/favicon_16x16.png">
    <!-- Give IE8 a fighting chance -->
    <!--[if lt IE 9]>
    <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
    <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
    <![endif]-->
    <link rel="stylesheet" type="text/css" href="/guide/static/styles.css">
  </head>

  <!--© 2015-2021 Elasticsearch B.V. Copying, publishing and/or distributing without written permission is strictly prohibited.-->

  <body>
    <!-- Google Tag Manager -->
    <script>dataLayer = [];</script><noscript><iframe src="//www.googletagmanager.com/ns.html?id=GTM-58RLH5" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
    <script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= '//www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer','GTM-58RLH5');</script>
    <!-- End Google Tag Manager -->

    <!-- Global site tag (gtag.js) - Google Analytics -->
    <script async src="https://www.googletagmanager.com/gtag/js?id=UA-12395217-16"></script>
    <script>
      window.dataLayer = window.dataLayer || [];
      function gtag(){dataLayer.push(arguments);}
      gtag('js', new Date());
      gtag('config', 'UA-12395217-16');
    </script>

    <!--BEGIN QUALTRICS WEBSITE FEEDBACK SNIPPET-->
    <script type="text/javascript">
      (function(){var g=function(e,h,f,g){
      this.get=function(a){for(var a=a+"=",c=document.cookie.split(";"),b=0,e=c.length;b<e;b++){for(var d=c[b];" "==d.charAt(0);)d=d.substring(1,d.length);if(0==d.indexOf(a))return d.substring(a.length,d.length)}return null};
      this.set=function(a,c){var b="",b=new Date;b.setTime(b.getTime()+6048E5);b="; expires="+b.toGMTString();document.cookie=a+"="+c+b+"; path=/; "};
      this.check=function(){var a=this.get(f);if(a)a=a.split(":");else if(100!=e)"v"==h&&(e=Math.random()>=e/100?0:100),a=[h,e,0],this.set(f,a.join(":"));else return!0;var c=a[1];if(100==c)return!0;switch(a[0]){case "v":return!1;case "r":return c=a[2]%Math.floor(100/c),a[2]++,this.set(f,a.join(":")),!c}return!0};
      this.go=function(){if(this.check()){var a=document.createElement("script");a.type="text/javascript";a.src=g;document.body&&document.body.appendChild(a)}};
      this.start=function(){var a=this;window.addEventListener?window.addEventListener("load",function(){a.go()},!1):window.attachEvent&&window.attachEvent("onload",function(){a.go()})}};
      try{(new g(100,"r","QSI_S_ZN_emkP0oSe9Qrn7kF","https://znemkp0ose9qrn7kf-elastic.siteintercept.qualtrics.com/WRSiteInterceptEngine/?Q_ZID=ZN_emkP0oSe9Qrn7kF")).start()}catch(i){}})();
    </script><div id="ZN_emkP0oSe9Qrn7kF"><!--DO NOT REMOVE-CONTENTS PLACED HERE--></div>
    <!--END WEBSITE FEEDBACK SNIPPET-->

    <div id="elastic-nav" style="display:none;"></div>
    <script src="https://www.elastic.co/elastic-nav.js"></script>

    <!-- Subnav -->
    <div>
      <div>
        <div class="tertiary-nav d-none d-md-block">
          <div class="container">
            <div class="p-t-b-15 d-flex justify-content-between nav-container">
              <div class="breadcrum-wrapper"><span><a href="/guide/" style="font-size: 14px; font-weight: 600; color: #000;">Docs</a></span></div>
            </div>
          </div>
        </div>
      </div>
    </div>

    <div class="main-container">
      <section id="content">
        <div class="content-wrapper">

          <section id="guide" lang="en">
            <div class="container">
              <div class="row">
                <div class="col-xs-12 col-sm-8 col-md-8 guide-section">
                  <!-- start body -->
                  <div class="page_header">
<p>
  <strong>WARNING</strong>: The 2.x versions of Elasticsearch have passed their
  <a href="https://www.elastic.co/support/eol">EOL dates</a>. If you are running
  a 2.x version, we strongly advise you to upgrade.
</p>
<p>
  This documentation is no longer maintained and may be removed. For the latest
  information, see the <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html">current
  Elasticsearch documentation</a>.
</p>
</div>
<div id="content">
<div class="breadcrumbs">
<span class="breadcrumb-link"><a href="index.html">Elasticsearch: The Definitive Guide [2.x]</a></span>
»
<span class="breadcrumb-link"><a href="administration.html">Administration, Monitoring, and Deployment</a></span>
»
<span class="breadcrumb-link"><a href="cluster-admin.html">Monitoring</a></span>
»
<span class="breadcrumb-node">Monitoring Individual Nodes</span>
</div>
<div class="navheader">
<span class="prev">
<a href="_cluster_health.html">« Cluster Health</a>
</span>
<span class="next">
<a href="_cluster_stats.html">Cluster Stats »</a>
</span>
</div>
<div class="section">
<div class="titlepage"><div><div>
<h2 class="title">
<a id="_monitoring_individual_nodes"></a>Monitoring Individual Nodes<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/500_Cluster_Admin/30_node_stats.asciidoc">edit</a>
</h2>
</div></div></div>
<p><code class="literal">Cluster-health</code> is at one end of the spectrum—​a very high-level overview of
everything in your cluster.  The <code class="literal">node-stats</code> API is at the other end.  It provides
a bewildering array of statistics about each node in your cluster.</p>
<p><code class="literal">Node-stats</code> provides so many stats that, until you are accustomed to the output,
you may be unsure which metrics are most important to keep an eye on.  We’ll
highlight the most important metrics to monitor (but we encourage you to
log all the metrics provided—​or use Marvel—​because you’ll never know when
you need one stat or another).</p>
<p>The <code class="literal">node-stats</code> API can be executed with the following:</p>
<div class="pre_wrapper lang-bash">
<pre class="programlisting prettyprint lang-bash">GET _nodes/stats</pre>
</div>
<p>Starting at the top of the output, we see the cluster name and our first node:</p>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">{
   "cluster_name": "elasticsearch_zach",
   "nodes": {
      "UNr6ZMf5Qk-YCPA_L18BOQ": {
         "timestamp": 1408474151742,
         "name": "Zach",
         "transport_address": "inet[zacharys-air/192.168.1.131:9300]",
         "host": "zacharys-air",
         "ip": [
            "inet[zacharys-air/192.168.1.131:9300]",
            "NONE"
         ],
...</pre>
</div>
<p>The nodes are listed in a hash, with the key being the UUID of the node.  Some
information about the node’s network properties are displayed (such as transport address,
and host). These values are useful for debugging discovery problems, where
nodes won’t join the cluster. Often you’ll see that the port being used is wrong,
or the node is binding to the wrong IP address/interface.</p>
<div class="section">
<div class="titlepage"><div><div>
<h3 class="title">
<a id="_indices_section"></a>indices Section<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/500_Cluster_Admin/30_node_stats.asciidoc">edit</a>
</h3>
</div></div></div>
<p>The <code class="literal">indices</code> section lists aggregate statistics for all the indices that reside
on this particular node:</p>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">    "indices": {
        "docs": {
           "count": 6163666,
           "deleted": 0
        },
        "store": {
           "size_in_bytes": 2301398179,
           "throttle_time_in_millis": 122850
        },</pre>
</div>
<p>The returned statistics are grouped into the following sections:</p>
<div class="ulist itemizedlist">
<ul class="itemizedlist">
<li class="listitem">
<code class="literal">docs</code> shows how many documents reside on
this node, as well as the number of deleted docs that haven’t been purged
from segments yet.
</li>
<li class="listitem">
The <code class="literal">store</code> portion indicates how much physical storage is consumed by the node.
This metric includes both primary and replica shards.  If the throttle time is
large, it may be an indicator that your disk throttling is set too low
(discussed in <a class="xref" href="indexing-performance.html#segments-and-merging" title="Segments and Merging">Segments and Merging</a>).
</li>
</ul>
</div>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">        "indexing": {
           "index_total": 803441,
           "index_time_in_millis": 367654,
           "index_current": 99,
           "delete_total": 0,
           "delete_time_in_millis": 0,
           "delete_current": 0
        },
        "get": {
           "total": 6,
           "time_in_millis": 2,
           "exists_total": 5,
           "exists_time_in_millis": 2,
           "missing_total": 1,
           "missing_time_in_millis": 0,
           "current": 0
        },
        "search": {
           "open_contexts": 0,
           "query_total": 123,
           "query_time_in_millis": 531,
           "query_current": 0,
           "fetch_total": 3,
           "fetch_time_in_millis": 55,
           "fetch_current": 0
        },
        "merges": {
           "current": 0,
           "current_docs": 0,
           "current_size_in_bytes": 0,
           "total": 1128,
           "total_time_in_millis": 21338523,
           "total_docs": 7241313,
           "total_size_in_bytes": 5724869463
        },</pre>
</div>
<div class="ulist itemizedlist">
<ul class="itemizedlist">
<li class="listitem">
<p><code class="literal">indexing</code> shows the number of docs that have been indexed.  This value is a monotonically
increasing counter; it doesn’t decrease when docs are deleted.  Also note that it
is incremented anytime an <em>index</em> operation happens internally, which includes
things like updates.</p>
<p>Also listed are times for indexing, the number of docs currently being indexed,
and similar statistics for deletes.</p>
</li>
<li class="listitem">
<code class="literal">get</code> shows statistics about get-by-ID statistics.  This includes <code class="literal">GET</code> and
<code class="literal">HEAD</code> requests for a single document.
</li>
<li class="listitem">
<p><code class="literal">search</code> describes the number of active searches (<code class="literal">open_contexts</code>), number of
queries total, and the amount of time spent on queries since the node was
started.  The ratio between <code class="literal">query_time_in_millis / query_total</code> can be used as a
rough indicator for how efficient your queries are.  The larger the ratio,
the more time each query is taking, and you should consider tuning or optimization.</p>
<p>The fetch statistics detail the second half of the query process (the <em>fetch</em> in
query-then-fetch).  If more time is spent in fetch than query, this can be an
indicator of slow disks or very large documents being fetched, or
potentially search requests with paginations that are too large (for example, <code class="literal">size: 10000</code>).</p>
</li>
<li class="listitem">
<p><code class="literal">merges</code> contains information about Lucene segment merges.  It will tell you
the number of merges that are currently active, the number of docs involved, the cumulative
size of segments being merged, and the amount of time spent on merges in total.</p>
<p>Merge statistics can be important if your cluster is write heavy.  Merging consumes
a large amount of disk I/O and CPU resources.  If your index is write heavy and
you see large merge numbers, be sure to read <a class="xref" href="indexing-performance.html" title="Indexing Performance Tips">Indexing Performance Tips</a>.</p>
<p>Note: updates and deletes will contribute to large merge numbers too, since they
cause segment <em>fragmentation</em> that needs to be merged out eventually.</p>
</li>
</ul>
</div>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">        "filter_cache": {
           "memory_size_in_bytes": 48,
           "evictions": 0
        },
        "fielddata": {
           "memory_size_in_bytes": 0,
           "evictions": 0
        },
        "segments": {
           "count": 319,
           "memory_in_bytes": 65812120
        },
        ...</pre>
</div>
<div class="ulist itemizedlist">
<ul class="itemizedlist">
<li class="listitem">
<p><code class="literal">filter_cache</code> indicates the amount of memory used by the cached filter bitsets,
and the number of times a filter has been evicted.  A large number of evictions
<em>could</em> indicate that you need to increase the filter cache size, or that
your filters are not caching well (for example, they are churning heavily because of high cardinality,
such as caching <code class="literal">now</code> date expressions).</p>
<p>However, evictions are a difficult metric to evaluate.  Filters are cached on a
per-segment basis, and evicting a filter from a small segment is much less
expensive than evicting a filter from a large segment.  It’s possible that you have many evictions, but they all occur on small segments, which means they have
little impact on query performance.</p>
<p>Use the eviction metric as a rough guideline.  If you see a large number, investigate
your filters to make sure they are caching well.  Filters that constantly evict,
even on small segments, will be much less effective than properly cached filters.</p>
</li>
<li class="listitem">
<code class="literal">field_data</code> displays the memory used by fielddata, which is used for aggregations,
sorting, and more.  There is also an eviction count.  Unlike <code class="literal">filter_cache</code>, the eviction
count here is useful:  it should be zero or very close.  Since field data
is not a cache, any eviction is costly and should be avoided.  If you see
evictions here, you need to reevaluate your memory situation, fielddata limits,
queries, or all three.
</li>
<li class="listitem">
<p><code class="literal">segments</code> will tell you the number of Lucene segments this node currently serves.
This can be an important number.  Most indices should have around 50–150 segments,
even if they are terabytes in size with billions of documents.  Large numbers
of segments can indicate a problem with merging (for example, merging is not keeping up
with segment creation).  Note that this statistic is the aggregate total of all
indices on the node, so keep that in mind.</p>
<p>The <code class="literal">memory</code> statistic gives you an idea of the amount of memory being used by the
Lucene segments themselves.  This includes low-level data structures such as
posting lists, dictionaries, and bloom filters.  A very large number of segments
will increase the amount of overhead lost to these data structures, and the memory
usage can be a handy metric to gauge that overhead.</p>
</li>
</ul>
</div>
</div>

<div class="section">
<div class="titlepage"><div><div>
<h3 class="title">
<a id="_os_and_process_sections"></a>OS and Process Sections<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/500_Cluster_Admin/30_node_stats.asciidoc">edit</a>
</h3>
</div></div></div>
<p>The <code class="literal">OS</code> and <code class="literal">Process</code> sections are fairly self-explanatory and won’t be covered
in great detail.  They list basic resource statistics such as CPU and load.  The
<code class="literal">OS</code> section describes it for the entire <code class="literal">OS</code>, while the <code class="literal">Process</code> section shows just
what the Elasticsearch JVM process is using.</p>
<p>These are obviously useful metrics, but are often being measured elsewhere in your
monitoring stack. Some stats include the following:</p>
<div class="ulist itemizedlist">
<ul class="itemizedlist">
<li class="listitem">
CPU
</li>
<li class="listitem">
Load
</li>
<li class="listitem">
Memory usage
</li>
<li class="listitem">
Swap usage
</li>
<li class="listitem">
Open file descriptors
</li>
</ul>
</div>
</div>

<div class="section">
<div class="titlepage"><div><div>
<h3 class="title">
<a id="_jvm_section"></a>JVM Section<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/500_Cluster_Admin/30_node_stats.asciidoc">edit</a>
</h3>
</div></div></div>
<p>The <code class="literal">jvm</code> section contains some critical information about the JVM process that
is running Elasticsearch.  Most important, it contains garbage collection details,
which have a large impact on the stability of your Elasticsearch cluster.</p>
<div class="sidebar">
<a id="garbage_collector_primer"></a>
<div class="titlepage"><div><div>
<p class="title"><strong>Garbage Collection Primer</strong></p>
</div></div></div>
<p>Before we describe the stats, it is useful to give a crash course in garbage
collection and its impact on Elasticsearch.  If you are familar with garbage
collection in the JVM, feel free to skip down.</p>
<p>Java is a <em>garbage-collected</em> language, which means that the programmer does
not manually manage memory allocation and deallocation.  The programmer simply
writes code, and the Java Virtual Machine (JVM) manages the process of allocating
memory as needed, and then later cleaning up that memory when no longer needed.</p>
<p>When memory is allocated to a JVM process, it is allocated in a big chunk called
the <em>heap</em>.  The JVM then breaks the heap into two groups, referred to as
<em>generations</em>:</p>
<div class="variablelist">
<dl class="variablelist">
<dt>
<span class="term">
Young (or Eden)
</span>
</dt>
<dd>
    The space where newly instantiated objects are allocated. The
young generation space is often quite small, usually 100 MB–500 MB.  The young-gen
also contains two <em>survivor</em> spaces.
</dd>
<dt>
<span class="term">
Old
</span>
</dt>
<dd>
    The space where older objects are stored.  These objects are expected to be long-lived
and persist for a long time.  The old-gen is often much larger than the young-gen,
and Elasticsearch nodes can see old-gens as large as 30 GB.
</dd>
</dl>
</div>
<p>When an object is instantiated, it is placed into young-gen.  When the young
generation space is full, a young-gen garbage collection (GC) is started.  Objects that are still
"alive" are moved into one of the survivor spaces, and "dead" objects are removed.
If an object has survived several young-gen GCs, it will be "tenured" into the
old generation.</p>
<p>A similar process happens in the old generation:  when the space becomes full, a
garbage collection is started and dead objects are removed.</p>
<p>Nothing comes for free, however.  Both the young- and old-generation garbage collectors
have phases that "stop the world."  During this time, the JVM literally halts
execution of the program so it can trace the object graph and collect dead
objects. During this stop-the-world phase, nothing happens.  Requests are not serviced,
pings are not responded to, shards are not relocated.  The world quite literally
stops.</p>
<p>This isn’t a big deal for the young generation; its small size means GCs execute
quickly.  But the old-gen is quite a bit larger, and a slow GC here could mean
1s or even 15s of pausing—​which is unacceptable for server software.</p>
<p>The garbage collectors in the JVM are <em>very</em> sophisticated algorithms and do
a great job minimizing pauses.  And Elasticsearch tries very hard to be <em>garbage-collection friendly</em>, by intelligently reusing objects internally, reusing network
buffers, and enabling <a class="xref" href="docvalues.html" title="Doc Values">Doc Values</a> by default.  But ultimately,
GC frequency and duration is a metric that needs to be watched by you, since it
is the number one culprit for cluster instability.</p>
<p>A cluster that is frequently experiencing long GC will be a cluster that is under
heavy load with not enough memory.  These long GCs will make nodes drop off the
cluster for brief periods.  This instability causes shards to relocate frequently
as Elasticsearch tries to keep the cluster balanced and enough replicas available.  This in
turn increases network traffic and disk I/O, all while your cluster is attempting
to service the normal indexing and query load.</p>
<p>In short, long GCs are bad and need to be minimized as much as possible.</p>
</div>
<p>Because garbage collection is so critical to Elasticsearch, you should become intimately
familiar with this section of the <code class="literal">node-stats</code> API:</p>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">        "jvm": {
            "timestamp": 1408556438203,
            "uptime_in_millis": 14457,
            "mem": {
               "heap_used_in_bytes": 457252160,
               "heap_used_percent": 44,
               "heap_committed_in_bytes": 1038876672,
               "heap_max_in_bytes": 1038876672,
               "non_heap_used_in_bytes": 38680680,
               "non_heap_committed_in_bytes": 38993920,</pre>
</div>
<div class="ulist itemizedlist">
<ul class="itemizedlist">
<li class="listitem">
<p>The <code class="literal">jvm</code> section first lists some general stats about heap memory usage.  You
can see how much of the heap is being used, how much is committed (actually allocated
to the process), and the max size the heap is allowed to grow to.  Ideally,
<code class="literal">heap_committed_in_bytes</code> should be identical to <code class="literal">heap_max_in_bytes</code>.  If the
committed size is smaller, the JVM will have to resize the heap eventually—​and this is a very expensive process.  If your numbers are not identical, see
<a class="xref" href="heap-sizing.html" title="Heap: Sizing and Swapping">Heap: Sizing and Swapping</a> for how to configure it correctly.</p>
<p>The <code class="literal">heap_used_percent</code> metric is a useful number to keep an eye on.  Elasticsearch
is configured to initiate GCs when the heap reaches 75% full.  If your node is
consistently &gt;= 75%, your node is experiencing <em>memory pressure</em>.
This is a warning sign that slow GCs may be in your near future.</p>
<p>If the heap usage is consistently &gt;=85%, you are in trouble.  Heaps over 90–95%
are in risk of horrible performance with long 10–30s GCs at best, and out-of-memory
(OOM) exceptions at worst.</p>
</li>
</ul>
</div>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">   "pools": {
      "young": {
         "used_in_bytes": 138467752,
         "max_in_bytes": 279183360,
         "peak_used_in_bytes": 279183360,
         "peak_max_in_bytes": 279183360
      },
      "survivor": {
         "used_in_bytes": 34865152,
         "max_in_bytes": 34865152,
         "peak_used_in_bytes": 34865152,
         "peak_max_in_bytes": 34865152
      },
      "old": {
         "used_in_bytes": 283919256,
         "max_in_bytes": 724828160,
         "peak_used_in_bytes": 283919256,
         "peak_max_in_bytes": 724828160
      }
   }
},</pre>
</div>
<div class="ulist itemizedlist">
<ul class="itemizedlist">
<li class="listitem">
The <code class="literal">young</code>, <code class="literal">survivor</code>, and <code class="literal">old</code> sections will give you a breakdown of memory
usage of each generation in the GC.  These stats are handy for keeping an eye on
relative sizes, but are often not overly important when debugging problems.
</li>
</ul>
</div>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">"gc": {
   "collectors": {
      "young": {
         "collection_count": 13,
         "collection_time_in_millis": 923
      },
      "old": {
         "collection_count": 0,
         "collection_time_in_millis": 0
      }
   }
}</pre>
</div>
<div class="ulist itemizedlist">
<ul class="itemizedlist">
<li class="listitem">
<p><code class="literal">gc</code> section shows the garbage collection counts and cumulative time for both
young and old generations.  You can safely ignore the young generation counts
for the most part:  this number will usually be large.  That is perfectly
normal.</p>
<p>In contrast, the old generation collection count should remain small, and
have a small <code class="literal">collection_time_in_millis</code>.  These are cumulative counts, so it is
hard to give an exact number when you should start worrying (for example, a node with a
one-year uptime will have a large count even if it is healthy). This is one of the
reasons that tools such as Marvel are so helpful.  GC counts <em>over time</em> are the
important consideration.</p>
<p>Time spent GC’ing is also important.  For example, a certain amount of garbage
is generated while indexing documents.  This is normal and causes a GC every
now and then. These GCs are almost always fast and have little effect on the
node: young generation takes a millisecond or two, and old generation takes
a few hundred milliseconds.  This is much different from 10-second GCs.</p>
<p>Our best advice is to collect collection counts and duration periodically (or use Marvel)
and keep an eye out for frequent GCs.  You can also enable slow-GC logging,
discussed in <a class="xref" href="logging.html" title="Logging">Logging</a>.</p>
</li>
</ul>
</div>
</div>

<div class="section">
<div class="titlepage"><div><div>
<h3 class="title">
<a id="_threadpool_section"></a>Threadpool Section<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/500_Cluster_Admin/30_node_stats.asciidoc">edit</a>
</h3>
</div></div></div>
<p>Elasticsearch maintains threadpools internally.  These threadpools
cooperate to get work done, passing work between each other as necessary. In
general, you don’t need to configure or tune the threadpools, but it is sometimes
useful to see their stats so you can gain insight into how your cluster is behaving.</p>
<p>There are about a dozen threadpools, but they all share the same format:</p>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">  "index": {
     "threads": 1,
     "queue": 0,
     "active": 0,
     "rejected": 0,
     "largest": 1,
     "completed": 1
  }</pre>
</div>
<p>Each threadpool lists the number of threads that are configured (<code class="literal">threads</code>),
how many of those threads are actively processing some work (<code class="literal">active</code>), and how
many work units are sitting in a queue (<code class="literal">queue</code>).</p>
<p>If the queue fills up to its limit, new work units will begin to be rejected, and
you will see that reflected in the <code class="literal">rejected</code> statistic.  This is often a sign
that your cluster is starting to bottleneck on some resources, since a full
queue means your node/cluster is processing at maximum speed but unable to keep
up with the influx of work.</p>
<div class="sidebar">
<div class="titlepage"><div><div>
<p class="title"><strong>Bulk Rejections</strong></p>
</div></div></div>
<p>If you are going to encounter queue rejections, it will most likely be caused
by bulk indexing requests.  It is easy to send many bulk requests to Elasticsearch
by using concurrent import processes.  More is better, right?</p>
<p>In reality, each cluster has a certain limit at which it can not keep up with
ingestion.  Once this threshold is crossed, the queue will quickly fill up, and
new bulks will be rejected.</p>
<p>This is a <em>good thing</em>.  Queue rejections are a useful form of back pressure.  They
let you know that your cluster is at maximum capacity, which is much better than
sticking data into an in-memory queue.  Increasing the queue size doesn’t increase
performance; it just hides the problem.  If your cluster can process only 10,000
docs per second, it doesn’t matter whether the queue is 100 or 10,000,000—​your cluster can
still process only 10,000 docs per second.</p>
<p>The queue simply hides the performance problem and carries a real risk of data-loss.
Anything sitting in a queue is by definition not processed yet.  If the node
goes down, all those requests are lost forever.  Furthermore, the queue eats
up a lot of memory, which is not ideal.</p>
<p>It is much better to handle queuing in your application by gracefully handling
the back pressure from a full queue.  When you receive bulk rejections, you should take these steps:</p>
<div class="olist orderedlist">
<ol class="orderedlist">
<li class="listitem">
Pause the import thread for 3–5 seconds.
</li>
<li class="listitem">
Extract the rejected actions from the bulk response, since it is probable that
many of the actions were successful. The bulk response will tell you which succeeded
and which were rejected.
</li>
<li class="listitem">
Send a new bulk request with just the rejected actions.
</li>
<li class="listitem">
Repeat from step 1 if rejections are encountered again.
</li>
</ol>
</div>
<p>Using this procedure, your code naturally adapts to the load of your cluster and
naturally backs off.</p>
<p>Rejections are not errors: they just mean you should try again later.</p>
</div>
<p>There are a dozen threadpools.  Most you can safely ignore, but a few
are good to keep an eye on:</p>
<div class="variablelist">
<dl class="variablelist">
<dt>
<span class="term">
<code class="literal">indexing</code>
</span>
</dt>
<dd>
Threadpool for normal indexing requests
</dd>
<dt>
<span class="term">
<code class="literal">bulk</code>
</span>
</dt>
<dd>
Bulk requests, which are distinct from the nonbulk indexing requests
</dd>
<dt>
<span class="term">
<code class="literal">get</code>
</span>
</dt>
<dd>
Get-by-ID operations
</dd>
<dt>
<span class="term">
<code class="literal">search</code>
</span>
</dt>
<dd>
All search and query requests
</dd>
<dt>
<span class="term">
<code class="literal">merging</code>
</span>
</dt>
<dd>
Threadpool dedicated to managing Lucene merges
</dd>
</dl>
</div>
</div>

<div class="section">
<div class="titlepage"><div><div>
<h3 class="title">
<a id="_fs_and_network_sections"></a>FS and Network Sections<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/500_Cluster_Admin/30_node_stats.asciidoc">edit</a>
</h3>
</div></div></div>
<p>Continuing down the <code class="literal">node-stats</code> API, you’ll see a bunch of statistics about your
filesystem:  free space, data directory paths, disk I/O stats, and more.  If you are
not monitoring free disk space, you can get those stats here.  The disk I/O stats
are also handy, but often more specialized command-line tools (<code class="literal">iostat</code>, for example)
are more useful.</p>
<p>Obviously, Elasticsearch has a difficult time functioning if you run out of disk
space—​so make sure you don’t.</p>
<p>There are also two sections on network statistics:</p>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">        "transport": {
            "server_open": 13,
            "rx_count": 11696,
            "rx_size_in_bytes": 1525774,
            "tx_count": 10282,
            "tx_size_in_bytes": 1440101928
         },
         "http": {
            "current_open": 4,
            "total_opened": 23
         },</pre>
</div>
<div class="ulist itemizedlist">
<ul class="itemizedlist">
<li class="listitem">
<code class="literal">transport</code> shows some basic stats about the <em>transport address</em>.  This
relates to inter-node communication (often on port 9300) and any transport client
or node client connections.  Don’t worry if you see many connections here;
Elasticsearch maintains a large number of connections between nodes.
</li>
<li class="listitem">
<code class="literal">http</code> represents stats about the HTTP port (often 9200).  If you see a very
large <code class="literal">total_opened</code> number that is constantly increasing, that is a sure sign
that one of your HTTP clients is not using keep-alive connections.  Persistent,
keep-alive connections are important for performance, since building up and tearing
down sockets is expensive (and wastes file descriptors).  Make sure your clients
are configured appropriately.
</li>
</ul>
</div>
</div>

<div class="section">
<div class="titlepage"><div><div>
<h3 class="title">
<a id="_circuit_breaker"></a>Circuit Breaker<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/500_Cluster_Admin/30_node_stats.asciidoc">edit</a>
</h3>
</div></div></div>
<p>Finally, we come to the last section: stats about the fielddata circuit breaker
(introduced in <a class="xref" href="_limiting_memory_usage.html#circuit-breaker" title="Circuit Breaker">Circuit Breaker</a>):</p>
<div class="pre_wrapper lang-js pagebreak-before">
<pre class="programlisting prettyprint lang-js pagebreak-before">         "fielddata_breaker": {
            "maximum_size_in_bytes": 623326003,
            "maximum_size": "594.4mb",
            "estimated_size_in_bytes": 0,
            "estimated_size": "0b",
            "overhead": 1.03,
            "tripped": 0
         }</pre>
</div>
<p>Here, you can determine the maximum circuit-breaker size (for example, at what
size the circuit breaker will trip if a query attempts to use more memory).  This section
will also let you know the number of times the circuit breaker has been tripped, and
the currently configured overhead.  The overhead is used to pad estimates, because some queries are more difficult to estimate than others.</p>
<p>The main thing to watch is the <code class="literal">tripped</code> metric.  If this number is large or
consistently increasing, it’s a sign that your queries may need to be optimized
or that you may need to obtain more memory (either per box or by adding more
nodes).</p>
</div>

</div>
<div class="navfooter">
<span class="prev">
<a href="_cluster_health.html">« Cluster Health</a>
</span>
<span class="next">
<a href="_cluster_stats.html">Cluster Stats »</a>
</span>
</div>
</div>

                  <!-- end body -->
                </div>
                <div class="col-xs-12 col-sm-4 col-md-4" id="right_col">
                  <div id="rtpcontainer" style="display: block;">
                    <div class="mktg-promo">
                      <h3>Most Popular</h3>
                      <ul class="icons">
                        <li class="icon-elasticsearch-white"><a href="https://www.elastic.co/webinars/getting-started-elasticsearch?baymax=default&amp;elektra=docs&amp;storm=top-video">Get Started with Elasticsearch: Video</a></li>
                        <li class="icon-kibana-white"><a href="https://www.elastic.co/webinars/getting-started-kibana?baymax=default&amp;elektra=docs&amp;storm=top-video">Intro to Kibana: Video</a></li>
                        <li class="icon-logstash-white"><a href="https://www.elastic.co/webinars/introduction-elk-stack?baymax=default&amp;elektra=docs&amp;storm=top-video">ELK for Logs &amp; Metrics: Video</a></li>
                      </ul>
                    </div>
                  </div>
                </div>
              </div>
            </div>
          </section>

        </div>


<div id="elastic-footer"></div>
<script src="https://www.elastic.co/elastic-footer.js"></script>
<!-- Footer Section end-->

      </section>
    </div>

<script src="/guide/static/jquery.js"></script>
<script type="text/javascript" src="/guide/static/docs.js"></script>
<script type="text/javascript">
  window.initial_state = {}</script>
  </body>
</html>
